Note that the scone scores have already been multiplied by (+/-)1 such that a high score corresponds to a good normalization.
sconeResultFiles <- list.files("../../objects",
pattern = "*scores.rds$", full.names = TRUE)
sconeResultFilesShort <- list.files("../../objects",
pattern = "*scores.rds$")
datasets <- unlist(lapply(strsplit(sconeResultFilesShort, split="_"),"[[",1))
datasets[datasets %in% "geschwind2018"] <- "torre-ubieta2018"
scores <- sapply(1:length(sconeResultFiles), function(ii) readRDS(sconeResultFiles[ii]))
names(scores) <- datasets
# remove other methods
rmMethods <- c("gcqn_mean", "gcqn_median_10", "gcqn_median_20", "gcqn_median_100",
"gcqn_median_permuted")
scores <- lapply(scores, function(scoreMat){
scoreNames <- rownames(scoreMat)
rmID <- unlist(sapply(rmMethods, function(method){
grep(x=scoreNames, pattern=method)
}))
return(scoreMat[-rmID,])
})
# remove unused metrics
rmMetrics <- c("RLE_MED", "RLE_IQR", "rleGC_iqr")
scores <- lapply(scores, function(scoreMat){
scoreColNames <- colnames(scoreMat)
rmID <- unlist(sapply(rmMetrics, function(metric){
grep(x=scoreColNames, pattern=metric)
}))
return(scoreMat[,-rmID])
})
# focus on no_uv
keepUV <- "no_uv"
scores <- lapply(scores, function(scoreMat){
scoreNames <- rownames(scoreMat)
keepID <- grep(x=scoreNames, pattern=keepUV)
return(scoreMat[keepID,])
})
# rank scores: high rank is good score according to that metric.
rankedScores <- lapply(scores, function(scoreMat){
apply(scoreMat,2,rank)
})
simulationResultFiles <- list.files("../../objects",
pattern = "simulationResults", full.names = TRUE)
simulationResultFilesShort <- list.files("../../objects",
pattern = "simulationResults")
datasets <- unlist(lapply(strsplit(simulationResultFilesShort, split="_"),"[[",1))
datasets[datasets %in% "geschwind2018"] <- "torre-ubieta2018"
simulationScores <- sapply(1:length(simulationResultFiles), function(ii){
readRDS(simulationResultFiles[ii])
}, simplify = FALSE)
names(simulationScores) <- datasets
# extract cbd
cbdList <- list()
for(kk in 1:length(simulationScores)){
cbdList[[kk]] <- simulationScores[[kk]][[2]]$cbd
}
names(cbdList) <- datasets
# remove unused metrics
rmMetricsSim <- c("aufdptpr", "cbd")
simulationScores <- lapply(simulationScores, function(scoreList){
scoreList[[2]] <- scoreList[[2]][!names(scoreList[[2]]) %in% rmMetricsSim]
return(scoreList)
})
# high is good
simulationScores <- lapply(simulationScores, function(simScoreList){
simScoreList[[1]]$fpr <- -abs(simScoreList[[1]]$fpr - 0.05)
simScoreList[[1]]$distUnifPvalMock <- -simScoreList[[1]]$distUnifPvalMock
simScoreList[[1]]$helDistVarGC <- -simScoreList[[1]]$helDistVarGC
simScoreList[[2]]$DAGCDistDiff <- -simScoreList[[2]]$DAGCDistDiff
return(simScoreList)
})
simulationScoreMats <- lapply(simulationScores, function(simScoreList){
oneList <- c(simScoreList[[1]], simScoreList[[2]])
# ensure same order
nameOrder <- names(oneList[[1]])
oneList <- lapply(oneList, function(x) x[nameOrder])
scoreMat <- t(do.call(rbind, oneList))
return(scoreMat)
})
simulationScoreRanks <- lapply(simulationScoreMats, function(scoreMat){
apply(scoreMat,2,rank)
})
rankedScores <- lapply(rankedScores, function(scoreMat){
rownames(scoreMat) <- unlist(lapply(strsplit(rownames(scoreMat), split=","), "[[", 2))
rownames(scoreMat)[rownames(scoreMat) == "gcqn_median_50"] <- "gcqn"
rownames(scoreMat)[rownames(scoreMat) == "fq"] <- "FQ"
if("cqn_length" %in% rownames(scoreMat)){
scoreMat <- scoreMat[!rownames(scoreMat) == "cqn",]
rownames(scoreMat)[rownames(scoreMat) == "cqn_length"] <- "cqn"
}
rownames(scoreMat)[rownames(scoreMat) == "tmm"] <- "TMM"
rownames(scoreMat)[rownames(scoreMat) == "deseq"] <- "DESeq2"
return(scoreMat)
})
# available datasets
dats <- names(simulationScoreRanks)
allScoreRanks <- list()
for(dd in 1:length(dats)){
sconeScores <- rankedScores[[dats[dd]]]
simScores <- simulationScoreRanks[[dats[dd]]]
sconeScores <- sconeScores[rownames(simScores),]
allScores <- cbind(sconeScores, simScores)
allScoreRanks[[dd]] <- allScores
}
for(dd in 1:length(allScoreRanks)){
curScores <- allScoreRanks[[dd]]
rn <- rownames(curScores)
rn[rn == "gcqn"] <- "GC-FQ"
rn[rn == "edaseq"] <- "FQ-FQ"
rn[rn == "gcqn_smooth"] <- "GC-FQ_smooth"
rn[rn == "uq"] <- "UQ"
rn[rn == "none"] <- "None"
rn[rn == "sum"] <- "Sum"
rownames(curScores) <- rn
allScoreRanks[[dd]] <- curScores
}
library(ggplot2)
gcNormMethods <- c("cqn", "FQ-FQ", "GC-FQ", "GC-FQ_smooth")
pDat <- list()
for(dd in 1:length(dats)){
curScores <- allScoreRanks[[dd]]
curScores <- curScores[order(rowMeans(curScores)),]
df <- data.frame(rank = c(curScores),
method = rep(rownames(curScores), ncol(curScores)),
metric = rep(colnames(curScores), each=nrow(curScores)))
df$method <- factor(df$method, levels = rownames(curScores))
# introduce class on what metrics assess
df$class <- NA
df$class[df$metric %in% c("BIO_SIL", "BATCH_SIL", "PAM_SIL", "EXP_QC_COR", "EXP_UV_COR")] <- "norm"
df$class[df$metric %in% c("rleGC_med", "helDistVarGC", "DAGCDistDiff")] <- "GC"
df$class[df$metric %in% c("fpr", "distUnifPvalMock", "auroc", "aufdptpr")] <- "DAA"
# shorter name for metrics
df$metric <- as.character(df$metric)
df$metric[df$metric == "BATCH_SIL"] <- "Batch Sil"
df$metric[df$metric == "BIO_SIL"] <- "Bio Sil"
df$metric[df$metric == "DAGCDistDiff"] <- "GC-dist DA"
df$metric[df$metric == "distUnifPvalMock"] <- "p-val unif"
df$metric[df$metric == "EXP_QC_COR"] <- "QC cor"
df$metric[df$metric == "EXP_UV_COR"] <- "UV cor"
df$metric[df$metric == "helDistVarGC"] <- "p-val GC"
df$metric[df$metric == "PAM_SIL"] <- "PAM Sil"
df$metric[df$metric == "rleGC_med"] <- "RLE GC"
df$dataset <- dats[dd]
if(dd == 1){
dfAll <- df
} else {
dfAll <- rbind(dfAll, df)
}
dfNorm <- df[df$class == "norm",]
pNorm <- ggplot(dfNorm, aes(x=metric, y=method, fill=rank)) +
geom_tile() +
scale_fill_gradient2() +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9),
axis.text.y = element_text(size=12, colour=ifelse(levels(df$method) %in% gcNormMethods,
"dodgerblue", "black")),
axis.ticks.y = element_blank()) +
scale_x_discrete(position = "top") +
ylab(NULL) +
xlab("Normalization \n performance") #+
# annotate("text", x = -0.2, y = 12, label = dats[dd], size=6) +
# coord_cartesian(ylim = c(0,10), xlim=c(1,length(unique(dfNorm$metric))), clip = "off")
dfDA <- df[df$class == "DAA",]
pDA <- ggplot(dfDA, aes(x=metric, y=method, fill=rank)) +
geom_tile() +
scale_fill_gradient2(low = "skyblue", high="steelblue") +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9)) +
scale_x_discrete(position = "top") +
xlab("Differential analysis \n performance") +
ylab(NULL)
dfGC <- df[df$class == "GC",]
pGC <- ggplot(dfGC, aes(x=metric, y=method, fill=rank)) +
geom_tile() +
scale_fill_gradient2(low = "darkseagreen2", high="darkseagreen4") +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9)) +
scale_x_discrete(position = "top") +
xlab("GC-content bias \n removal") +
ylab(NULL)
pDat[[dd]] <- cowplot::plot_grid(pNorm, pDA, pGC,
nrow = 1,
rel_widths = c(1.5, 1, .85))
}
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
cowplot::plot_grid(plotlist = pDat,
nrow = 4, ncol = 2,
labels=dats,
label_size = 11,
label_x = -0.025)
ggsave("~/Dropbox/research/atacseq/bulk/plots/totalBenchmarkPerDataset.pdf",
height = 14, width = 12)
library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.1.6 ✓ dplyr 1.0.3
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ✓ purrr 0.3.3
## ── Conflicts ───────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# get order across all
avNormRanks <- dfAll %>%
group_by(method) %>%
summarize(avRank = mean(rank, na.rm=TRUE))
orderedMethods <- as.character(avNormRanks$method[order(avNormRanks$avRank, decreasing=FALSE)])
gcNormMethods <- c("cqn", "FQ-FQ", "GC-FQ", "GC-FQ_smooth")
dfAllNorm <- dfAll[dfAll$class == "norm",]
dfAllNormSummary <- dfAllNorm %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
dfAllNormSummary$method <- factor(dfAllNormSummary$method, levels=orderedMethods)
dfAllNormSummary$dataset <- factor(dfAllNormSummary$dataset,
levels = c("bryois2018", "calderon2019", "fullard2019",
"murphy2019","torre-ubieta2018",
"philip2017", "rizzardi2019",
"liu2019"))
dfAllNormSummaryNorm <- dfAllNormSummary
pNormAll <- ggplot(dfAllNormSummary, aes(x=dataset, y=method, fill=meanRank)) +
geom_tile() +
scale_fill_gradient2() +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35),
axis.text.y = element_text(size = 14,
colour=ifelse(orderedMethods %in% gcNormMethods,
"dodgerblue", "black"))) +
scale_x_discrete(position = "top") +
ylab(NULL) +
xlab("Normalization \n performance")
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## Differential accessibility analysis
dfAllNorm <- dfAll[dfAll$class == "DAA",]
dfAllNormSummary <- dfAllNorm %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
dfAllNormSummary$method <- factor(dfAllNormSummary$method, levels=orderedMethods)
dfAllNormSummary$dataset <- factor(dfAllNormSummary$dataset,
levels = c("bryois2018", "calderon2019", "fullard2019",
"murphy2019","torre-ubieta2018",
"philip2017", "rizzardi2019",
"liu2019"))
dfAllNormSummaryDA <- dfAllNormSummary
pDAAll <- ggplot(dfAllNormSummary, aes(x=dataset, y=method, fill=meanRank)) +
geom_tile() +
scale_fill_gradient2(low = "skyblue", high="steelblue") +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35)) +
scale_x_discrete(position = "top") +
xlab("Differential analysis \n performance") +
ylab(NULL)
## GC-content bias removal
dfAllNorm <- dfAll[dfAll$class == "GC",]
dfAllNormSummary <- dfAllNorm %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
# get order
avNormRanks <- dfAllNormSummary %>%
group_by(method) %>%
summarize(avRank = mean(meanRank))
orderedMethods <- as.character(avNormRanks$method[order(avNormRanks$avRank, decreasing=FALSE)])
dfAllNormSummary$method <- factor(dfAllNormSummary$method, levels=orderedMethods)
dfAllNormSummary$dataset <- factor(dfAllNormSummary$dataset,
levels = c("bryois2018", "calderon2019", "fullard2019",
"murphy2019","torre-ubieta2018",
"philip2017", "rizzardi2019",
"liu2019"))
dfAllNormSummaryGC <- dfAllNormSummary
pGCAll <- ggplot(dfAllNormSummary, aes(x=dataset, y=method, fill=meanRank)) +
geom_tile() +
scale_fill_gradient2(low = "darkseagreen2", high="darkseagreen4") +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35)) +
scale_x_discrete(position = "top") +
xlab("GC-content bias \n removal") +
ylab(NULL)
cowplot::plot_grid(pNormAll, pDAAll, pGCAll,
nrow = 1,
rel_widths = c(1.5, 1, 1))
ggsave("~/Dropbox/research/atacseq/bulk/plots/totalBenchmarkSummary.pdf",
height = 6, width = 12)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
imgFile <- "/Users/koenvandenberge/Dropbox/research/atacseq/bulk/plots/benchmarkSchemev1.png"
pScheme <- ggdraw() +
draw_image(imgFile,
scale=1)
pHeat <- cowplot::plot_grid(pNormAll, pDAAll, pGCAll,
nrow = 1,
rel_widths = c(1.5, 1, 1))
cowplot::plot_grid(pScheme,
pHeat,
nrow=2,
labels=letters[1:2],
rel_heights = c(3/4, 1))
ggsave("~/Dropbox/research/atacseq/bulk/plots/totalBenchmarkSummary_scheme.pdf",
height = 15, width = 12)
dfAllSummary <- rbind(dfAllNormSummaryNorm,
dfAllNormSummaryDA,
dfAllNormSummaryGC)
dfAllSummSumm <- dfAllSummary %>%
group_by(method) %>%
summarize(meanmeanRank=mean(meanRank))
dfAllSummSumm$global <- factor(rep("global", nrow(dfAllSummSumm)))
pGlobal <- ggplot(dfAllSummSumm, aes(x=global, y=method, fill=meanmeanRank)) +
geom_tile() +
scale_fill_gradient2(low = "darkgoldenrod1", high="darkgoldenrod3") +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35)) +
scale_x_discrete(position = "top") +
xlab("GC-content bias \n removal") +
ylab(NULL)
cowplot::plot_grid(pNormAll, pDAAll, pGCAll, pGlobal,
nrow = 1,
rel_widths = c(1.5, 1, 1, .2))
library(tidyverse)
# get order across all
dfAll_noGC <- dfAll[!dfAll$class == "GC",]
avNormRanks <- dfAll_noGC %>%
group_by(method) %>%
summarize(avRank = mean(rank, na.rm=TRUE))
orderedMethods <- as.character(avNormRanks$method[order(avNormRanks$avRank, decreasing=FALSE)])
gcNormMethods <- c("cqn", "FQ-FQ", "GC-FQ", "GC-FQ_smooth")
dfAllNorm <- dfAll_noGC[dfAll_noGC$class == "norm",]
dfAllNormSummary <- dfAllNorm %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
dfAllNormSummary$method <- factor(dfAllNormSummary$method, levels=orderedMethods)
dfAllNormSummary$dataset <- factor(dfAllNormSummary$dataset,
levels = c("bryois2018", "calderon2019", "fullard2019",
"murphy2019","torre-ubieta2018",
"philip2017", "rizzardi2019",
"liu2019"))
pNormAll <- ggplot(dfAllNormSummary, aes(x=dataset, y=method, fill=meanRank)) +
geom_tile() +
scale_fill_gradient2() +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35),
axis.text.y = element_text(size = 14,
colour=ifelse(orderedMethods %in% gcNormMethods,
"dodgerblue", "black"))) +
scale_x_discrete(position = "top") +
ylab(NULL) +
xlab("Normalization \n performance")
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## Differential accessibility analysis
dfAllNorm <- dfAll_noGC[dfAll_noGC$class == "DAA",]
dfAllNormSummary <- dfAllNorm %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
dfAllNormSummary$method <- factor(dfAllNormSummary$method, levels=orderedMethods)
dfAllNormSummary$dataset <- factor(dfAllNormSummary$dataset,
levels = c("bryois2018", "calderon2019", "fullard2019",
"murphy2019","torre-ubieta2018",
"philip2017", "rizzardi2019",
"liu2019"))
pDAAll <- ggplot(dfAllNormSummary, aes(x=dataset, y=method, fill=meanRank)) +
geom_tile() +
scale_fill_gradient2(low = "skyblue", high="steelblue") +
theme(legend.position = "none",
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35)) +
scale_x_discrete(position = "top") +
xlab("Differential analysis \n performance") +
ylab(NULL)
cowplot::plot_grid(pNormAll, pDAAll,
nrow = 1,
rel_widths = c(1.5, 1))
ggsave("~/Dropbox/research/atacseq/bulk/plots/totalBenchmarkSummary_noGC.pdf",
height = 6, width = 10)
# get order across all
dfAllOnlyNorm <- dfAll[dfAll$class == "norm",]
avNormRanks <- dfAllOnlyNorm %>%
group_by(method) %>%
summarize(avRank = mean(rank, na.rm=TRUE))
orderedMethods <- as.character(avNormRanks$method[order(avNormRanks$avRank, decreasing=FALSE)])
dfAllNormSummary <- dfAllOnlyNorm %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
dfAllNormSummary$method <- factor(dfAllNormSummary$method, levels=orderedMethods)
dfAllNormSummary$dataset <- factor(dfAllNormSummary$dataset,
levels = c("bryois2018", "calderon2019", "fullard2019",
"murphy2019","torre-ubieta2018",
"philip2017", "rizzardi2019",
"liu2019"))
pNormOnly <- ggplot(dfAllNormSummary, aes(x=dataset, y=method, fill=meanRank)) +
geom_tile() +
scale_fill_gradient2() +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35),
axis.text.y = element_text(size = 14,
colour=ifelse(orderedMethods %in% gcNormMethods,
"dodgerblue", "black"))) +
scale_x_discrete(position = "top") +
ylab(NULL) +
xlab("Normalization \n performance")
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
pNormOnly
# get order across all
dfAllOnlyDA <- dfAll[dfAll$class == "DAA",]
avNormRanks <- dfAllOnlyDA %>%
group_by(method) %>%
summarize(avRank = mean(rank, na.rm=TRUE))
orderedMethods <- as.character(avNormRanks$method[order(avNormRanks$avRank, decreasing=FALSE)])
dfAllNormSummary <- dfAllOnlyDA %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
dfAllNormSummary$method <- factor(dfAllNormSummary$method, levels=orderedMethods)
dfAllNormSummary$dataset <- factor(dfAllNormSummary$dataset,
levels = c("bryois2018", "calderon2019", "fullard2019",
"murphy2019","torre-ubieta2018",
"philip2017", "rizzardi2019",
"liu2019"))
pDAOnly <- ggplot(dfAllNormSummary, aes(x=dataset, y=method, fill=meanRank)) +
geom_tile() +
scale_fill_gradient2(low = "skyblue", high="steelblue") +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35),
axis.text.y = element_text(size = 14,
colour=ifelse(orderedMethods %in% gcNormMethods,
"dodgerblue", "black"))) +
scale_x_discrete(position = "top") +
ylab(NULL) +
xlab("Differential analysis \n performance")
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
pDAOnly
# get order across all
dfAllOnlyGC <- dfAll[dfAll$class == "GC",]
avNormRanks <- dfAllOnlyGC %>%
group_by(method) %>%
summarize(avRank = mean(rank, na.rm=TRUE))
orderedMethods <- as.character(avNormRanks$method[order(avNormRanks$avRank, decreasing=FALSE)])
dfAllNormSummary <- dfAllOnlyGC %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
dfAllNormSummary$method <- factor(dfAllNormSummary$method, levels=orderedMethods)
dfAllNormSummary$dataset <- factor(dfAllNormSummary$dataset,
levels = c("bryois2018", "calderon2019", "fullard2019",
"murphy2019","torre-ubieta2018",
"philip2017", "rizzardi2019",
"liu2019"))
pGCOnly <- ggplot(dfAllNormSummary, aes(x=dataset, y=method, fill=meanRank)) +
geom_tile() +
scale_fill_gradient2(low = "darkseagreen2", high="darkseagreen4") +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35),
axis.text.y = element_text(size = 14,
colour=ifelse(orderedMethods %in% gcNormMethods,
"dodgerblue", "black"))) +
scale_x_discrete(position = "top") +
ylab(NULL) +
xlab("GC-content bias \n removal")
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
pGCOnly
cowplot::plot_grid(pNormOnly,
pDAOnly,
pGCOnly,
labels=letters[1:3],
nrow = 1)
ggsave("~/Dropbox/research/atacseq/bulk/plots/totalBenchmark_perComponent.pdf",
width = 12, height = 6)
dfAllOnlyNorm <- dfAll[dfAll$class == "norm",]
dfAllOnlyDA <- dfAll[dfAll$class == "DAA",]
dfAllOnlyGC <- dfAll[dfAll$class == "GC",]
pDatList <- list()
pDatList <- lapply(1:8, function(dd){ #loop across datasets
dfAllOnlyNormDat <- dfAllOnlyNorm[dfAllOnlyNorm$dataset == unique(dfAllOnlyNorm$dataset)[dd],]
avNormRanksDat <- dfAllOnlyNormDat %>%
group_by(method) %>%
summarize(avRank = mean(rank, na.rm=TRUE))
orderedMethodsDat <- as.character(avNormRanksDat$method[order(avNormRanksDat$avRank, decreasing=FALSE)])
dfAllNormSummary <- dfAllOnlyNormDat %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
dfAllNormSummary$method <- factor(dfAllNormSummary$method, levels=orderedMethodsDat)
dfAllOnlyNormDat$method <- factor(dfAllOnlyNormDat$method, levels=orderedMethodsDat)
pNormOnlyDat <- ggplot(dfAllOnlyNormDat, aes(x=metric, y=method, fill=rank)) +
geom_tile() +
scale_fill_gradient2() +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35),
axis.text.y = element_text(size = 14,
colour=ifelse(orderedMethodsDat %in% gcNormMethods,
"dodgerblue", "black"))) +
scale_x_discrete(position = "top") +
ylab(NULL) +
xlab("Normalization \n performance")
## DAA
dfAllOnlyDADat <- dfAllOnlyDA[dfAllOnlyDA$dataset == unique(dfAllOnlyDA$dataset)[dd],]
avDARanksDat <- dfAllOnlyDADat %>%
group_by(method) %>%
summarize(avRank = mean(rank, na.rm=TRUE))
orderedMethodsDat <- as.character(avDARanksDat$method[order(avDARanksDat$avRank, decreasing=FALSE)])
dfAllDASummary <- dfAllOnlyDADat %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
dfAllDASummary$method <- factor(dfAllDASummary$method, levels=orderedMethodsDat)
dfAllOnlyDADat$method <- factor(dfAllOnlyDADat$method, levels=orderedMethodsDat)
pDAOnlyDat <- ggplot(dfAllOnlyDADat, aes(x=metric, y=method, fill=rank)) +
geom_tile() +
scale_fill_gradient2(low = "skyblue", high="steelblue") +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35),
axis.text.y = element_text(size = 14,
colour=ifelse(orderedMethodsDat %in% gcNormMethods,
"dodgerblue", "black"))) +
scale_x_discrete(position = "top") +
ylab(NULL) +
xlab("Differential analysis\nperformance")
# GC-content
dfAllOnlyGCDat <- dfAllOnlyGC[dfAllOnlyGC$dataset == unique(dfAllOnlyGC$dataset)[dd],]
avGCRanksDat <- dfAllOnlyGCDat %>%
group_by(method) %>%
summarize(avRank = mean(rank, na.rm=TRUE))
orderedMethodsDat <- as.character(avGCRanksDat$method[order(avGCRanksDat$avRank, decreasing=FALSE)])
dfAllGCSummary <- dfAllOnlyGCDat %>%
group_by(dataset, method) %>%
summarize(meanRank = mean(rank))
dfAllGCSummary$method <- factor(dfAllGCSummary$method, levels=orderedMethodsDat)
dfAllOnlyGCDat$method <- factor(dfAllOnlyGCDat$method, levels=orderedMethodsDat)
pGCOnlyDat <- ggplot(dfAllOnlyGCDat, aes(x=metric, y=method, fill=rank)) +
geom_tile() +
scale_fill_gradient2(low = "darkseagreen2", high="darkseagreen4") +
theme(legend.position = "none",
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 25, size=9, vjust=.8, hjust=.35),
axis.text.y = element_text(size = 14,
colour=ifelse(orderedMethodsDat %in% gcNormMethods,
"dodgerblue", "black"))) +
scale_x_discrete(position = "top") +
ylab(NULL) +
xlab("GC-content bias \n removal")
pDat <- cowplot::plot_grid(pNormOnlyDat,
pDAOnlyDat,
pGCOnlyDat,
nrow = 1, ncol=3)
curDataset <- datasets[dd]
substr(curDataset,1,1) <- toupper(substr(curDataset,1,1))
pDat <- pDat + geom_text(mapping = aes(x=0.08,y=.95, label=curDataset),
fontface="bold")
return(pDat)
})
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
## `summarise()` has grouped output by 'dataset'. You can override using the `.groups` argument.
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
cowplot::plot_grid(plotlist=pDatList[1:4], nrow=4, ncol=1)
ggsave("../../../../plots/benchmark_allMeasures_perDataset1.pdf",
width=8, height=10)
cowplot::plot_grid(plotlist=pDatList[5:8], nrow=4, ncol=1)
ggsave("../../../../plots/benchmark_allMeasures_perDataset2.pdf",
width=8, height=10)
calcJaccard <- function(x, y){
sum(which(x) %in% which(y)) / length(unique(c(which(x), which(y))))
}
pairwiseJaccard <- function(mat){
combs <- combn(x=1:ncol(mat), m=2)
jacMat <- matrix(NA, nrow=ncol(mat), ncol=ncol(mat))
rownames(jacMat) <- colnames(jacMat) <- colnames(mat)
for(cc in 1:ncol(combs)){
jacMat[combs[1,cc], combs[2,cc]] <- calcJaccard(curde[,combs[1,cc]],
curde[,combs[2,cc]])
}
# make symmetric
diag(jacMat) <- NA
jacMat[lower.tri(jacMat)] <- t(jacMat)[lower.tri(jacMat)]
return(jacMat)
}
jacMatList <- list()
curdeList <- list()
library(iCOBRA)
## This version of bslib is designed to work with shiny version 1.5.0.9007 or higher.
for(kk in 1:length(cbdList)){
curpadj <- apply(pval(cbdList[[kk]]), 2, p.adjust, method="fdr")
curpadj[is.na(curpadj)] <- 1
curde <- curpadj <= 0.05
curdeList[[kk]] <- curde
curJacMat <- pairwiseJaccard(curde)
rn <- rownames(curJacMat)
rn[rn == "gcqn"] <- "GC-FQ"
rn[rn == "edaseq"] <- "FQ-FQ"
rn[rn == "gcqn_smooth"] <- "GC-FQ_smooth"
rn[rn == "uq"] <- "UQ"
rn[rn == "none"] <- "None"
rn[rn == "sum"] <- "Sum"
rownames(curJacMat) <- colnames(curJacMat) <- rn
jacMatList[[kk]] <- curJacMat
}
## average jaccard
avJaccard <- Reduce('+', jacMatList) / length(jacMatList)
library(pheatmap)
diag(avJaccard) <- NA
pheatmap(avJaccard, border=NA)
phavjac <- pheatmap(avJaccard, border=NA, fontsize=18)
changeNames <- function(x){
names(x) <- unlist(lapply(strsplit(names(x), split=","), "[[", 2))
names(x)[names(x) == "gcqn_median_50"] <- "GC-FQ"
names(x)[names(x) == "gcqn_smooth"] <- "GC-FQ_smooth"
names(x)[names(x) == "edaseq"] <- "FQ-FQ"
names(x)[names(x) == "fq"] <- "FQ"
if("cqn_length" %in% names(x)){
x <- x[!names(x) == "cqn"]
names(x)[names(x) == "cqn_length"] <- "cqn"
}
names(x)[names(x) == "tmm"] <- "TMM"
names(x)[names(x) == "deseq"] <- "DESeq2"
names(x)[names(x) == "uq"] <- "UQ"
names(x)[names(x) == "none"] <- "None"
names(x)[names(x) == "sum"] <- "Sum"
return(x)
}
bioSil <- lapply(scores, function(x) x[,"BIO_SIL"])
bioSil <- lapply(bioSil, changeNames)
bioSil <- lapply(bioSil, function(x) x[order(names(x))])
bioSil <- lapply(bioSil, function(x) x - mean(x))
bioSilMat <- do.call(rbind, bioSil)
# avSil <- Reduce('+', bioSil) / length(bioSil)
par(bty='l')
boxplot(bioSilMat, ylab="", ylab="")
## Warning in bxp(list(stats = structure(c(-0.0469545675699299,
## -0.0079496414579699, : Duplicated argument ylab = "" is disregarded
abline(h=0, col="red", lty=2)
library(ggplot2)
bioSilMat <- bioSilMat[,order(apply(bioSilMat,2,median), decreasing=TRUE)]
orderedMethods <- colnames(bioSilMat)
dfLong <- tidyr::gather(as.data.frame(bioSilMat))
colnames(dfLong) <- c("method", "value")
dfLong$method <- factor(dfLong$method, levels=colnames(bioSilMat))
pSil <- ggplot(dfLong, aes(x=method, y=value)) +
theme_classic() +
geom_boxplot() +
geom_hline(yintercept = 0, col="red", lty=2) +
ylab("Deviation from average silhouette") +
xlab("") +
theme(axis.text.x = element_text(angle = 360-25, size=18,
color=ifelse(orderedMethods %in% gcNormMethods,
"dodgerblue", "black")),
axis.text.y = element_text(size=18),
axis.title.y = element_text(size=18))
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
pSil
cowplot::plot_grid(phavjac[[4]], pSil, labels=letters[1:2],
rel_widths = c(2/3, 1/3))
ggsave("~/Dropbox/research/atacseq/bulk/plots/totalBenchmark_concordance.pdf",
height = 10, width = 20)
Are datasets that use a constant peak width different as compared to datasets using variable peak widths with respect to sample-specific GC-content bias? Note we need to switch the sign again as these were all metrics where a high value was bad.
medianPeakWidths <- readRDS("../../data/medianPeakWidths.rds")
## Hellinger distance with uniform distribution across GC bins for mock
helDistList <- lapply(simulationScores, function(x) x[[1]]$helDistVarGC)
tmmHelDist <- unlist(lapply(helDistList, function(x) x[names(x) == "TMM"]))
df <- data.frame(helDist = -tmmHelDist,
dataset = names(helDistList),
peakSize = "variable")
df$peakSize <- as.character(df$peakSize)
df$peakSize[df$dataset %in% c("bryois2018", "murphy2019")] <- "constant"
df$peakSize <- factor(df$peakSize)
df$medianWidth <- c(301, 492, 385, 980, 485, 201, 458, 500)
pHelUnif <- ggplot(df, aes(x=peakSize, y=helDist, col=dataset)) +
geom_point() +
theme_classic() +
ggtitle("P-val uniform wrt GC") +
theme(legend.position = "none") +
xlab("Peak width") +
ylab("Hellinger distance")
pHelUnifWidth <- ggplot(df, aes(x=medianWidth, y=helDist, col=dataset)) +
geom_point() +
theme_classic() +
ggtitle("P-val uniform wrt GC") +
theme(legend.position = "none") +
xlab("Median peak width") +
ylab("Hellinger distance")
## Hellinger distance with true GC distribution of DA peaks
helDistList <- lapply(simulationScores, function(x) x[[2]]$DAGCDistDiff)
tmmHelDist <- unlist(lapply(helDistList, function(x) x[names(x) == "TMM"]))
df <- data.frame(helDist = -tmmHelDist,
dataset = names(helDistList),
peakSize = "variable")
df$peakSize <- as.character(df$peakSize)
df$peakSize[df$dataset %in% c("bryois2018", "murphy2019")] <- "constant"
df$peakSize <- factor(df$peakSize)
df$medianWidth <- c(301, 492, 385, 980, 485, 201, 458, 500)
pHelDA <- ggplot(df, aes(x=peakSize, y=helDist, col=dataset)) +
geom_point() +
theme_classic() +
ggtitle("GC dist of DA peaks") +
theme(legend.position = "none") +
xlab("Peak width") +
ylab("Cumulative distance")
pHelDAWidth <- ggplot(df, aes(x=medianWidth, y=helDist, col=dataset)) +
geom_point() +
theme_classic() +
ggtitle("GC dist of DA peaks") +
theme(legend.position = "none") +
xlab("Median peak width") +
ylab("Cumulative distance")
## RLE MED GC
rleGCList <- lapply(scores, function(x) x[,"rleGC_med"])
tmmrleGC <- unlist(lapply(rleGCList, function(x) x[grep(x=names(x), pattern="tmm")]))
df <- data.frame(rleGC = -tmmrleGC,
dataset = names(scores),
peakSize = "variable")
df$peakSize <- as.character(df$peakSize)
df$peakSize[df$dataset %in% c("bryois2018", "murphy2019")] <- "constant"
df$peakSize <- factor(df$peakSize)
df$medianWidth <- c(301, 492, 385, 980, 485, 201, 458, 500)
pRLEGC <- ggplot(df, aes(x=peakSize, y=rleGC, col=dataset)) +
geom_point() +
theme_classic() +
ggtitle("RLE_MED GC") +
xlab("Peak width") +
ylab("RLE value")
pRLEGCWidth <- ggplot(df, aes(x=medianWidth, y=rleGC, col=dataset)) +
geom_point() +
theme_classic() +
ggtitle("RLE_MED GC") +
xlab("median peak width") +
ylab("RLE value")
cowplot::plot_grid(pHelUnif, pHelDA, pRLEGC,
nrow=1, labels=letters[1:3],
rel_widths = c(0.6, 0.6, 1))
ggsave("~/Dropbox/research/atacseq/bulk/plots/GCPeakWidth.pdf",
height = 6, width = 11)
cowplot::plot_grid(pHelUnifWidth, pHelDAWidth, pRLEGCWidth,
nrow=1, labels=letters[1:3],
rel_widths = c(0.6, 0.6, 1))
ggsave("~/Dropbox/research/atacseq/bulk/plots/GCPeakmedianWidth.pdf",
height = 6, width = 11)